package com.lzx.demo;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.util.Timer;
import java.util.TimerTask;

/**
 * @author 程序员星星
 * @date 2023/3/12
 * @Description
 */
public class Test {
    private static final String URL_TEMPLATE = "https://blog.csdn.net/%s/article/list/%d";
    private static final String USER_NAME = "crazy1013";
    private static final int PAGE_NUM = 1;

    public static void main(String[] args) {
        TimerTask task = new TimerTask() {
            public void run() {
                try {
                    crawl();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        };
        Timer timer = new Timer();
        long delay = 0;
        long period = 1000 * 60 * 60 * 24; // 每隔一天爬取一次
        timer.scheduleAtFixedRate(task, delay, period);
    }

    private static void crawl() throws IOException {
        String url = String.format(URL_TEMPLATE, USER_NAME, PAGE_NUM);
        Document doc = Jsoup.connect(url).get();
        Elements articleElements = doc.select("div.article-list div.article-item-box h4 a");
        for (Element articleElement : articleElements) {
            String articleUrl = articleElement.attr("href");
            Document articleDoc = Jsoup.connect(articleUrl).get();
            Element articleTitleElement = articleDoc.selectFirst("h1.title-article");
            String articleTitle = articleTitleElement.text();
            Element articleBodyElement = articleDoc.selectFirst("div#article_content");
            String articleBody = articleBodyElement.html();
            // TODO: 处理文章内容
            System.out.println(articleTitle);
            System.out.println(articleBody);
        }
    }
}
